Platform Explorer / Nuxeo Platform 6.0

Contribution org.nuxeo.ecm.platform.categorization.defaultSubjectsCategorizer--categorizers

This contribution is part of XML component org.nuxeo.ecm.platform.categorization.defaultSubjectsCategorizer inside nuxeo-platform-categorization-subjects-6.0.jar

Documentation

Default categorizer to guess the dc:subjects property of english documents.

Extension Point

Extension point categorizers of component DocumentCategorizationService.

Contributed Items

  • <categorizer enabled="true" factory="org.nuxeo.ecm.platform.categorization.categorizer.tfidf.TfIdfCategorizerFactory" maxSuggestions="3" minTextLength="200" model="models/topics-51-tfidf-65536-model.gz" name="dc:subjects" precisionThreshold="1.5" property="dc:subjects">
         <skip>
          <facet name="Folderish"/>
         </skip>
         <mapping>
           <outcome name="architecture">art/architecture</outcome>
           <outcome name="comics">art/comics</outcome>
           <outcome name="cinema">art/cinema</outcome>
           <outcome name="culture">art/culture</outcome>
           <outcome name="danse">art/danse</outcome>
           <outcome name="art history">art/art history</outcome>
           <outcome name="literature">art/literature</outcome>
           <outcome name="music">art/music</outcome>
           <outcome name="painting">art/paint</outcome>
           <outcome name="photography">art/photography</outcome>
           <outcome name="show">art/show</outcome>
           <outcome name="rights">human sciences/rights</outcome>
           <outcome name="economy">human sciences/economy</outcome>
           <outcome name="geography">human sciences/geography</outcome>
           <outcome name="history">human sciences/history</outcome>
           <outcome name="journalism">human sciences/information</outcome>
           <outcome name="languages">human sciences/languages</outcome>
           <outcome name="phylosophy">human sciences/phylosophy</outcome>
           <outcome name="psychology">human sciences/psychology</outcome>
           <outcome name="sociology">human sciences/sociology</outcome>
           <outcome name="astronomy">sciences/astronomy</outcome>
           <outcome name="biology">sciences/biology</outcome>
           <outcome name="chemistry">sciences/chemistry</outcome>
           <outcome name="information technology">sciences/it</outcome>
           <outcome name="logic">sciences/logic</outcome>
           <outcome name="math">sciences/math</outcome>
           <outcome name="medicine">sciences/medicine</outcome>
           <outcome name="physic">sciences/physic</outcome>
           <outcome name="earthscience">sciences/earthscience</outcome>
           <outcome name="education">society/education</outcome>
           <outcome name="company">society/company</outcome>
           <outcome name="ecology">society/ecology</outcome>
           <outcome name="women">society/women</outcome>
           <outcome name="humanitarian">society/humanitarian</outcome>
           <outcome name="politic">society/politic</outcome>
           <outcome name="religion">society/religion</outcome>
           <outcome name="collection">daily life/collection</outcome>
           <outcome name="gastronomy">daily life/gastronomy</outcome>
           <outcome name="gardening">daily life/gardening</outcome>
           <outcome name="games">daily life/games</outcome>
           <outcome name="video games">daily life/video games</outcome>
           <outcome name="fashion">daily life/fashion</outcome>
           <outcome name="sexuality">daily life/sexuality</outcome>
           <outcome name="sport">daily life/sport</outcome>
           <outcome name="television">daily life/television</outcome>
           <outcome name="tourism">daily life/tourism</outcome>
           <outcome name="astronautic">technology/astronautic</outcome>
           <outcome name="electronic">technology/electronic</outcome>
           <outcome name="energy">technology/energy</outcome>
           <outcome name="industry">technology/industry</outcome>
           <outcome name="it">technology/it</outcome>
           <outcome name="robotic">technology/robotic</outcome>
           <outcome name="transport">technology/transport</outcome>
         </mapping>
    
       </categorizer>

XML Source

<extension point="categorizers" target="org.nuxeo.ecm.platform.categorization.service.DocumentCategorizationService">

    <documentation>
      Default categorizer to guess the dc:subjects property of english documents.
    </documentation>

   <categorizer enabled="true" factory="org.nuxeo.ecm.platform.categorization.categorizer.tfidf.TfIdfCategorizerFactory" maxSuggestions="3" minTextLength="200" model="models/topics-51-tfidf-65536-model.gz" name="dc:subjects" precisionThreshold="1.5" property="dc:subjects">
     <skip>
      <facet name="Folderish"/>
     </skip>
     <mapping>
       <outcome name="architecture">art/architecture</outcome>
       <outcome name="comics">art/comics</outcome>
       <outcome name="cinema">art/cinema</outcome>
       <outcome name="culture">art/culture</outcome>
       <outcome name="danse">art/danse</outcome>
       <outcome name="art history">art/art history</outcome>
       <outcome name="literature">art/literature</outcome>
       <outcome name="music">art/music</outcome>
       <outcome name="painting">art/paint</outcome>
       <outcome name="photography">art/photography</outcome>
       <outcome name="show">art/show</outcome>
       <outcome name="rights">human sciences/rights</outcome>
       <outcome name="economy">human sciences/economy</outcome>
       <outcome name="geography">human sciences/geography</outcome>
       <outcome name="history">human sciences/history</outcome>
       <outcome name="journalism">human sciences/information</outcome>
       <outcome name="languages">human sciences/languages</outcome>
       <outcome name="phylosophy">human sciences/phylosophy</outcome>
       <outcome name="psychology">human sciences/psychology</outcome>
       <outcome name="sociology">human sciences/sociology</outcome>
       <outcome name="astronomy">sciences/astronomy</outcome>
       <outcome name="biology">sciences/biology</outcome>
       <outcome name="chemistry">sciences/chemistry</outcome>
       <outcome name="information technology">sciences/it</outcome>
       <outcome name="logic">sciences/logic</outcome>
       <outcome name="math">sciences/math</outcome>
       <outcome name="medicine">sciences/medicine</outcome>
       <outcome name="physic">sciences/physic</outcome>
       <outcome name="earthscience">sciences/earthscience</outcome>
       <outcome name="education">society/education</outcome>
       <outcome name="company">society/company</outcome>
       <outcome name="ecology">society/ecology</outcome>
       <outcome name="women">society/women</outcome>
       <outcome name="humanitarian">society/humanitarian</outcome>
       <outcome name="politic">society/politic</outcome>
       <outcome name="religion">society/religion</outcome>
       <outcome name="collection">daily life/collection</outcome>
       <outcome name="gastronomy">daily life/gastronomy</outcome>
       <outcome name="gardening">daily life/gardening</outcome>
       <outcome name="games">daily life/games</outcome>
       <outcome name="video games">daily life/video games</outcome>
       <outcome name="fashion">daily life/fashion</outcome>
       <outcome name="sexuality">daily life/sexuality</outcome>
       <outcome name="sport">daily life/sport</outcome>
       <outcome name="television">daily life/television</outcome>
       <outcome name="tourism">daily life/tourism</outcome>
       <outcome name="astronautic">technology/astronautic</outcome>
       <outcome name="electronic">technology/electronic</outcome>
       <outcome name="energy">technology/energy</outcome>
       <outcome name="industry">technology/industry</outcome>
       <outcome name="it">technology/it</outcome>
       <outcome name="robotic">technology/robotic</outcome>
       <outcome name="transport">technology/transport</outcome>
     </mapping>

   </categorizer>

  </extension>